#ifndef SHADER_SHADER_FSR_H
#define SHADER_SHADER_FSR_H

#include <string>

const std::string fsrRCASFS =
"#version 320 es                        \n"
"precision highp float;\n"
"precision highp int;\n"
"\n"
"layout(location=0) in vec2 vTexcoord;\n"
"layout(location=0) out vec4 outColor;\n"
"layout(binding = 0) uniform sampler2D uTexture;\n"
"layout(location = 1) uniform vec4 uTextureSize;\n"
"layout(location = 2) uniform float uSharpness;\n"
"\n"
"#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))\n"
"#define FSR_RCAS_DENOISE 1\n"
"\n"
"#define AU1 uint\n"
"#define AU2 uvec2\n"
"#define AU3 uvec3\n"
"#define AU4 uvec4\n"
"\n"
"#define AF1 float\n"
"#define AF2 vec2\n"
"#define AF3 vec3\n"
"#define AF4 vec4\n"
"\n"
"#define AExp2F1(a) exp2(AF1(a))\n"
"\n"
"AF1 AF1_x(AF1 a){return AF1(a);}\n"
"AF2 AF2_x(AF1 a){return AF2(a,a);}\n"
"AF3 AF3_x(AF1 a){return AF3(a,a,a);}\n"
"AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}\n"
"#define AF1_(a) AF1_x(AF1(a))\n"
"#define AF2_(a) AF2_x(AF1(a))\n"
"#define AF3_(a) AF3_x(AF1(a))\n"
"#define AF4_(a) AF4_x(AF1(a))\n"
"\n"
"AU1 AU1_x(AU1 a){return AU1(a);}\n"
"AU2 AU2_x(AU1 a){return AU2(a,a);}\n"
"AU3 AU3_x(AU1 a){return AU3(a,a,a);}\n"
"AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}\n"
"#define AU1_(a) AU1_x(AU1(a))\n"
"#define AU2_(a) AU2_x(AU1(a))\n"
"#define AU3_(a) AU3_x(AU1(a))\n"
"#define AU4_(a) AU4_x(AU1(a))\n"
"\n"
"#define AF1_AU1(x) uintBitsToFloat(AU1(x))\n"
"#define AU1_AF1(x) floatBitsToUint(AF1(x))\n"
"\n"
"AF1 ASatF1(AF1 x){return clamp(x,AF1_(0.0),AF1_(1.0));}\n"
"AF1 APrxMedRcpF1(AF1 a){AF1 b=AF1_AU1(AU1_(0x7ef19fff)-AU1_AF1(a));return b*(-b*a+AF1_(2.0));}\n"
"AF1 AMax3F1(AF1 x,AF1 y,AF1 z){return max(x,max(y,z));}\n"
"AF1 AMin3F1(AF1 x,AF1 y,AF1 z){return min(x,min(y,z));}\n"
"AF1 ARcpF1(AF1 x){return AF1_(1.0)/x;}\n"
"\n"
"void FsrRcasInputF(inout AF1 r,inout AF1 g,inout AF1 b){r*=r;g*=g;b*=b;}\n"
"\n"
"void FsrRcasF(\n"
"    out AF1 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.\n"
"    out AF1 pixG,\n"
"    out AF1 pixB,\n"
"    #ifdef FSR_RCAS_PASSTHROUGH_ALPHA\n"
"    out AF1 pixA,\n"
"    #endif\n"
"    AU1 con)\n"
"{ // Constant generated by RcasSetup().\n"
"    // Algorithm uses minimal 3x3 pixel neighborhood.\n"
"    //    b\n"
"    //  d e f\n"
"    //    h\n"
"    AF3 b=texture(uTexture,AF2(vTexcoord.x,vTexcoord.y)+vec2( 0.0,-1.0)*uTextureSize.zw).rgb;\n"
"    AF3 d=texture(uTexture,AF2(vTexcoord.x,vTexcoord.y)+vec2(-1.0, 0.0)*uTextureSize.zw).rgb;\n"
"    #ifdef FSR_RCAS_PASSTHROUGH_ALPHA\n"
"    AF4 ee=texture(uTexture,AF2(vTexcoord.x,vTexcoord.y));\n"
"    AF3 e=ee.rgb;pixA=ee.a;\n"
"    #else\n"
"    AF3 e=texture(uTexture,AF2(vTexcoord.x,vTexcoord.y)).rgb;\n"
"    #endif\n"
"    AF3 f=texture(uTexture,AF2(vTexcoord.x,vTexcoord.y)+vec2( 1.0, 0.0)*uTextureSize.zw).rgb;\n"
"    AF3 h=texture(uTexture,AF2(vTexcoord.x,vTexcoord.y)+vec2( 0.0, 1.0)*uTextureSize.zw).rgb;\n"
"\n"
"    // Rename (32-bit) or regroup (16-bit).\n"
"    AF1 bR=b.r;\n"
"    AF1 bG=b.g;\n"
"    AF1 bB=b.b;\n"
"    AF1 dR=d.r;\n"
"    AF1 dG=d.g;\n"
"    AF1 dB=d.b;\n"
"    AF1 eR=e.r;\n"
"    AF1 eG=e.g;\n"
"    AF1 eB=e.b;\n"
"    AF1 fR=f.r;\n"
"    AF1 fG=f.g;\n"
"    AF1 fB=f.b;\n"
"    AF1 hR=h.r;\n"
"    AF1 hG=h.g;\n"
"    AF1 hB=h.b;\n"
"    // Run optional input transform.\n"
"    //FsrRcasInputF(bR,bG,bB);\n"
"    //FsrRcasInputF(dR,dG,dB);\n"
"    //FsrRcasInputF(eR,eG,eB);\n"
"    //FsrRcasInputF(fR,fG,fB);\n"
"    //FsrRcasInputF(hR,hG,hB);\n"
"    // Luma times 2.\n"
"    AF1 bL=bB*AF1_(0.5)+(bR*AF1_(0.5)+bG);\n"
"    AF1 dL=dB*AF1_(0.5)+(dR*AF1_(0.5)+dG);\n"
"    AF1 eL=eB*AF1_(0.5)+(eR*AF1_(0.5)+eG);\n"
"    AF1 fL=fB*AF1_(0.5)+(fR*AF1_(0.5)+fG);\n"
"    AF1 hL=hB*AF1_(0.5)+(hR*AF1_(0.5)+hG);\n"
"    // Noise detection.\n"
"    AF1 nz=AF1_(0.25)*bL+AF1_(0.25)*dL+AF1_(0.25)*fL+AF1_(0.25)*hL-eL;\n"
"    nz=ASatF1(abs(nz)*APrxMedRcpF1(AMax3F1(AMax3F1(bL,dL,eL),fL,hL)-AMin3F1(AMin3F1(bL,dL,eL),fL,hL)));\n"
"    nz=AF1_(-0.5)*nz+AF1_(1.0);\n"
"    // Min and max of ring.\n"
"    AF1 mn4R=min(AMin3F1(bR,dR,fR),hR);\n"
"    AF1 mn4G=min(AMin3F1(bG,dG,fG),hG);\n"
"    AF1 mn4B=min(AMin3F1(bB,dB,fB),hB);\n"
"    AF1 mx4R=max(AMax3F1(bR,dR,fR),hR);\n"
"    AF1 mx4G=max(AMax3F1(bG,dG,fG),hG);\n"
"    AF1 mx4B=max(AMax3F1(bB,dB,fB),hB);\n"
"    // Immediate constants for peak range.\n"
"    AF2 peakC=AF2(1.0,-1.0*4.0);\n"
"    // Limiters, these need to be high precision RCPs.\n"
"    AF1 hitMinR=mn4R*ARcpF1(AF1_(4.0)*mx4R);\n"
"    AF1 hitMinG=mn4G*ARcpF1(AF1_(4.0)*mx4G);\n"
"    AF1 hitMinB=mn4B*ARcpF1(AF1_(4.0)*mx4B);\n"
"    AF1 hitMaxR=(peakC.x-mx4R)*ARcpF1(AF1_(4.0)*mn4R+peakC.y);\n"
"    AF1 hitMaxG=(peakC.x-mx4G)*ARcpF1(AF1_(4.0)*mn4G+peakC.y);\n"
"    AF1 hitMaxB=(peakC.x-mx4B)*ARcpF1(AF1_(4.0)*mn4B+peakC.y);\n"
"    AF1 lobeR=max(-hitMinR,hitMaxR);\n"
"    AF1 lobeG=max(-hitMinG,hitMaxG);\n"
"    AF1 lobeB=max(-hitMinB,hitMaxB);\n"
"    AF1 lobe=max(AF1_(-FSR_RCAS_LIMIT),min(AMax3F1(lobeR,lobeG,lobeB),AF1_(0.0)))*AF1_AU1(con);\n"
"    // Apply noise removal.\n"
"    #ifdef FSR_RCAS_DENOISE\n"
"    lobe*=nz;\n"
"    #endif\n"
"    // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.\n"
"    AF1 rcpL=APrxMedRcpF1(AF1_(4.0)*lobe+AF1_(1.0));\n"
"    pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;\n"
"    pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;\n"
"    pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;\n"
"    return;\n"
"}\n"
"\n"
"void main()\n"
"{\n"
"    outColor = vec4 (1,0,0,1);\n"
"    AU1 con;\n"
"    AF1 sharpness=uSharpness;\n"
"    con=AU1_AF1(sharpness);\n"
"\n"
"    FsrRcasF(outColor.r,outColor.g,outColor.b,con);\n"
"}";


const std::string fsrEASUFS = R"""(#version 320 es
precision highp float;
precision highp int;

struct UBO
{
vec4 uTextureSize;
vec4 uTextureRegion;
};

layout(location = 0) in highp vec2 vTexcoord;
layout(location = 0) out vec4 outColor;

layout(binding = 0) uniform sampler2D uTexture;
layout(location = 1) uniform UBO ubo;

#define AP1 bool

#define AU1 uint
#define AU2 uvec2
#define AU3 uvec3
#define AU4 uvec4

#define AF1 float
#define AF2 vec2
#define AF3 vec3
#define AF4 vec4

// half
#define AH1 mediump float
#define AH2 mediump vec2
#define AH3 mediump vec3
#define AH4 mediump vec4


AF1 AF1_x(AF1 a){return AF1(a);}
AF2 AF2_x(AF1 a){return AF2(a,a);}
AF3 AF3_x(AF1 a){return AF3(a,a,a);}
AF4 AF4_x(AF1 a){return AF4(a,a,a,a);}
#define AF1_(a) AF1_x(AF1(a))
#define AF2_(a) AF2_x(AF1(a))
#define AF3_(a) AF3_x(AF1(a))
#define AF4_(a) AF4_x(AF1(a))

AU1 AU1_x(AU1 a){return AU1(a);}
AU2 AU2_x(AU1 a){return AU2(a,a);}
AU3 AU3_x(AU1 a){return AU3(a,a,a);}
AU4 AU4_x(AU1 a){return AU4(a,a,a,a);}
#define AU1_(a) AU1_x(AU1(a))
#define AU2_(a) AU2_x(AU1(a))
#define AU3_(a) AU3_x(AU1(a))
#define AU4_(a) AU4_x(AU1(a))



AH1 AH1_x(AH1 a){AH1 tmp = a;return tmp;}
AH2 AH2_x(AH1 a){AH2 tmp = vec2(a,a); return tmp;}
AH3 AH3_x(AH1 a){AH3 tmp = vec3(a,a,a); return tmp;}
AH4 AH4_x(AH1 a){AH4 tmp = vec4(a,a,a,a); return tmp;}

#define AH1_(a) AH1_x(a)
#define AH2_(a) AH2_x(a)
#define AH3_(a) AH3_x(a)
#define AH4_(a) AH4_x(a)

#define AF1_AU1(x) uintBitsToFloat(AU1(x))
#define AF2_AU2(x) uintBitsToFloat(AU2(x))
#define AF3_AU3(x) uintBitsToFloat(AU3(x))
#define AF4_AU4(x) uintBitsToFloat(AU4(x))

#define AU1_AF1(x) floatBitsToUint(AF1(x))
#define AU2_AF2(x) floatBitsToUint(AF2(x))
#define AU3_AF3(x) floatBitsToUint(AF3(x))
#define AU4_AF4(x) floatBitsToUint(AF4(x))

AF4 FsrEasuRF(AF2 p){return AF4(textureGather(uTexture,AF2(p),0));}
AF4 FsrEasuGF(AF2 p){return AF4(textureGather(uTexture,AF2(p),1));}
AF4 FsrEasuBF(AF2 p){return AF4(textureGather(uTexture,AF2(p),2));}

AF4 FsrEasuRH(AF2 p){return vec4(textureGather(uTexture,AF2(p),0));}
AF4 FsrEasuGH(AF2 p){return vec4(textureGather(uTexture,AF2(p),1));}
AF4 FsrEasuBH(AF2 p){return vec4(textureGather(uTexture,AF2(p),2));}


AH4 FsrEasuSampleH(AF2 p){return vec4(texture(uTexture,AF2(p)));}

AF1 APrxLoRcpF1(AF1 a){return AF1_AU1(AU1_(0x7ef07ebb)-AU1_AF1(a));}
AF1 ASatF1(AF1 x){return clamp(x,AF1_(0.0),AF1_(1.0));}
AF1 APrxLoRsqF1(AF1 a){return AF1_AU1(AU1_(0x5f347d74)-(AU1_AF1(a)>>AU1_(1)));}
AF3 AMin3F3(AF3 x,AF3 y,AF3 z){return min(x,min(y,z));}
AF3 AMax3F3(AF3 x,AF3 y,AF3 z){return max(x,max(y,z));}
AF1 ARcpF1(AF1 x){return AF1_(1.0)/x;}


AH1 ARsqH1(AH1 x){return (1.0)/sqrt(x);}
AF1 ASatH1(AF1 x){return clamp(x,(0.0),(1.0));}
AF1 ARcpH1(AF1 x){return (1.0)/x;}


// Filtering for a given tap for the scalar.
void FsrEasuTapF(
inout AF3 aC, // Accumulated color, with negative lobe.
inout AF1 aW, // Accumulated weight.
AF2 off, // Pixel offset from resolve position to tap.
AF2 dir, // Gradient direction.
AF2 len, // Length.
AF1 lob, // Negative lobe strength.
AF1 clp, // Clipping point.
AF3 c)
{
// Tap color.
// Rotate offset by direction.
AF2 v;
v.x=(off.x*( dir.x))+(off.y*dir.y);
v.y=(off.x*(-dir.y))+(off.y*dir.x);
// Anisotropy.
v*=len;
// Compute distance^2.
AF1 d2=v.x*v.x+v.y*v.y;
// Limit to the window as at corner, 2 taps can easily be outside.
d2=min(d2,clp);
// Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
//  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
//  |_______________________________________|   |_______________|
//                   base                             window
// The general form of the 'base' is,
//  (a*(b*x^2-1)^2-(a-1))
// Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
AF1 wB=AF1_(2.0/5.0)*d2+AF1_(-1.0);
AF1 wA=lob*d2+AF1_(-1.0);
wB*=wB;
wA*=wA;
wB=AF1_(25.0/16.0)*wB+AF1_(-(25.0/16.0-1.0));
AF1 w=wB*wA;
// Do weighted average.
aC+=c*w;aW+=w;
}

void FsrEasuTapH(                              // 不变，半精度float计算
inout AH2 aCR,inout AH2 aCG,inout AH2 aCB,
inout AH2 aW,
AH2 offX,AH2 offY,
AH2 dir,
AH2 len,
AH1 lob,
AH1 clp,
AH2 cR,AH2 cG,AH2 cB){
AH2 vX,vY;
vX=offX*  dir.xx +offY*dir.yy;
vY=offX*(-dir.yy)+offY*dir.xx;
vX*=len.x;vY*=len.y;
AH2 d2=vX*vX+vY*vY;
d2=min(d2,AH2_(clp));
AH2 wB=AH2_(2.0/5.0)*d2+AH2_(-1.0);
AH2 wA=AH2_(lob)*d2+AH2_(-1.0);
wB*=wB;
wA*=wA;
wB=AH2_(25.0/16.0)*wB+AH2_(-(25.0/16.0-1.0));
AH2 w=wB*wA;
aCR+=cR*w;aCG+=cG*w;aCB+=cB*w;aW+=w;}

// Accumulate direction and length.
void FsrEasuSetF(
inout AF2 dir,
inout AF1 len,
AF2 pp,
AP1 biS,AP1 biT,AP1 biU,AP1 biV,
AF1 lA,AF1 lB,AF1 lC,AF1 lD,AF1 lE)
{
// Compute bilinear weight, branches factor out as predicates are compiler time immediates.
//  s t
//  u v
AF1 w;
if(biS)w=(AF1_(1.0)-pp.x)*(AF1_(1.0)-pp.y);
if(biT)w=           pp.x *(AF1_(1.0)-pp.y);
if(biU)w=(AF1_(1.0)-pp.x)*           pp.y ;
if(biV)w=           pp.x *           pp.y ;
// Direction is the '+' diff.
//    a
//  b c d
//    e
// Then takes magnitude from abs average of both sides of 'c'.
// Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped,    // then adding horz and vert terms.
AF1 dc=lD-lC;
AF1 cb=lC-lB;
AF1 lenX=max(abs(dc),abs(cb));
lenX=APrxLoRcpF1(lenX);
AF1 dirX=lD-lB;
dir.x+=dirX*w;
lenX=ASatF1(abs(dirX)*lenX);
lenX*=lenX;
len+=lenX*w;
// Repeat for the y axis.
AF1 ec=lE-lC;
AF1 ca=lC-lA;
AF1 lenY=max(abs(ec),abs(ca));
lenY=APrxLoRcpF1(lenY);
AF1 dirY=lE-lA;
dir.y+=dirY*w;
lenY=ASatF1(abs(dirY)*lenY);
lenY*=lenY;
len+=lenY*w;
}

void FsrEasuF(
out AF3 pix,
AU4 con1,
AU4 con2,
AU4 con3)
{
highp float texCoordX = vTexcoord.x * (ubo.uTextureRegion.z * ubo.uTextureSize.z) +  (ubo.uTextureRegion.x * ubo.uTextureSize.z);
highp float texCoordY = vTexcoord.y * (ubo.uTextureRegion.w * ubo.uTextureSize.w) +  (ubo.uTextureRegion.y * ubo.uTextureSize.w);
highp vec2 texCoord = vec2(texCoordX, texCoordY);

// Get position of 'f'.
AF2 pp = AF2(texCoord * ubo.uTextureSize.xy - 0.5);
AF2 fp=floor(pp);
pp-=fp;

// Allowing dead-code removal to remove the 'z's.
AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
// These are from p0 to avoid pulling two constants on pre-Navi hardware.
AF2 p1=p0+AF2_AU2(con2.xy);
AF2 p2=p0+AF2_AU2(con2.zw);
AF2 p3=p0+AF2_AU2(con3.xy);
AF4 bczzR=FsrEasuRF(p0);
AF4 bczzG=FsrEasuGF(p0);
AF4 bczzB=FsrEasuBF(p0);
AF4 ijfeR=FsrEasuRF(p1);
AF4 ijfeG=FsrEasuGF(p1);
AF4 ijfeB=FsrEasuBF(p1);
AF4 klhgR=FsrEasuRF(p2);
AF4 klhgG=FsrEasuGF(p2);
AF4 klhgB=FsrEasuBF(p2);
AF4 zzonR=FsrEasuRF(p3);
AF4 zzonG=FsrEasuGF(p3);
AF4 zzonB=FsrEasuBF(p3);

// Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
AF4 bczzL=bczzB*AF4_(0.5)+(bczzR*AF4_(0.5)+bczzG);
AF4 ijfeL=ijfeB*AF4_(0.5)+(ijfeR*AF4_(0.5)+ijfeG);
AF4 klhgL=klhgB*AF4_(0.5)+(klhgR*AF4_(0.5)+klhgG);
AF4 zzonL=zzonB*AF4_(0.5)+(zzonR*AF4_(0.5)+zzonG);
// Rename.
AF1 bL=bczzL.x;
AF1 cL=bczzL.y;
AF1 iL=ijfeL.x;
AF1 jL=ijfeL.y;
AF1 fL=ijfeL.z;
AF1 eL=ijfeL.w;
AF1 kL=klhgL.x;
AF1 lL=klhgL.y;
AF1 hL=klhgL.z;
AF1 gL=klhgL.w;
AF1 oL=zzonL.z;
AF1 nL=zzonL.w;
// Accumulate for bilinear interpolation.
AF2 dir=AF2_(0.0);
AF1 len=AF1_(0.0);
FsrEasuSetF(dir,len,pp,true, false,false,false,bL,eL,fL,gL,jL);
FsrEasuSetF(dir,len,pp,false,true ,false,false,cL,fL,gL,hL,kL);
FsrEasuSetF(dir,len,pp,false,false,true ,false,fL,iL,jL,kL,nL);
FsrEasuSetF(dir,len,pp,false,false,false,true ,gL,jL,kL,lL,oL);

// Normalize with approximation, and cleanup close to zero.
AF2 dir2=dir*dir;
AF1 dirR=dir2.x+dir2.y;
AP1 zro=dirR<AF1_(1.0/32768.0);
dirR=APrxLoRsqF1(dirR);
dirR=zro?AF1_(1.0):dirR;
dir.x=zro?AF1_(1.0):dir.x;
dir*=AF2_(dirR);
// Transform from {0 to 2} to {0 to 1} range, and shape with square.
len=len*AF1_(0.5);
len*=len;
// Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
AF1 stretch=(dir.x*dir.x+dir.y*dir.y)*APrxLoRcpF1(max(abs(dir.x),abs(dir.y)));
// Anisotropic length after rotation,
//  x := 1.0 lerp to 'stretch' on edges
//  y := 1.0 lerp to 2x on edges
AF2 len2=AF2(AF1_(1.0)+(stretch-AF1_(1.0))*len,AF1_(1.0)+AF1_(-0.5)*len);
// Based on the amount of 'edge',
// the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
AF1 lob=AF1_(0.5)+AF1_((1.0/4.0-0.04)-0.5)*len;
// Set distance^2 clipping point to the end of the adjustable window.
AF1 clp=APrxLoRcpF1(lob);

// Accumulation mixed with min/max of 4 nearest.
//    b c
//  e f g h
//  i j k l
//    n o
AF3 min4=min(AMin3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),
      AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
      AF3(klhgR.x,klhgG.x,klhgB.x));
AF3 max4=max(AMax3F3(AF3(ijfeR.z,ijfeG.z,ijfeB.z),AF3(klhgR.w,klhgG.w,klhgB.w),
      AF3(ijfeR.y,ijfeG.y,ijfeB.y)),
      AF3(klhgR.x,klhgG.x,klhgB.x));
// Accumulation.
AF3 aC=AF3_(0.0);
AF1 aW=AF1_(0.0);
FsrEasuTapF(aC,aW,AF2( 0.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.x,bczzG.x,bczzB.x)); // b
FsrEasuTapF(aC,aW,AF2( 1.0,-1.0)-pp,dir,len2,lob,clp,AF3(bczzR.y,bczzG.y,bczzB.y)); // c
FsrEasuTapF(aC,aW,AF2(-1.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.x,ijfeG.x,ijfeB.x)); // i
FsrEasuTapF(aC,aW,AF2( 0.0, 1.0)-pp,dir,len2,lob,clp,AF3(ijfeR.y,ijfeG.y,ijfeB.y)); // j
FsrEasuTapF(aC,aW,AF2( 0.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.z,ijfeG.z,ijfeB.z)); // f
FsrEasuTapF(aC,aW,AF2(-1.0, 0.0)-pp,dir,len2,lob,clp,AF3(ijfeR.w,ijfeG.w,ijfeB.w)); // e
FsrEasuTapF(aC,aW,AF2( 1.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.x,klhgG.x,klhgB.x)); // k
FsrEasuTapF(aC,aW,AF2( 2.0, 1.0)-pp,dir,len2,lob,clp,AF3(klhgR.y,klhgG.y,klhgB.y)); // l
FsrEasuTapF(aC,aW,AF2( 2.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.z,klhgG.z,klhgB.z)); // h
FsrEasuTapF(aC,aW,AF2( 1.0, 0.0)-pp,dir,len2,lob,clp,AF3(klhgR.w,klhgG.w,klhgB.w)); // g
FsrEasuTapF(aC,aW,AF2( 1.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.z,zzonG.z,zzonB.z)); // o
FsrEasuTapF(aC,aW,AF2( 0.0, 2.0)-pp,dir,len2,lob,clp,AF3(zzonR.w,zzonG.w,zzonB.w)); // n

// Normalize and dering.
pix=min(max4,max(min4,aC*AF3_(ARcpF1(aW))));
}

void FsrEasuL(
out AF3 pix,
AU4 con1,
AU4 con2,
AU4 con3,
AH1 factor) {
//------------------------------------------------------------------------------------------------------------------------------
// Direction is the '+' diff.
//    A
//  B C D
//    E
highp float texCoordX = vTexcoord.x * (ubo.uTextureRegion.z * ubo.uTextureSize.z) +  (ubo.uTextureRegion.x * ubo.uTextureSize.z);
highp float texCoordY = vTexcoord.y * (ubo.uTextureRegion.w * ubo.uTextureSize.w) +  (ubo.uTextureRegion.y * ubo.uTextureSize.w);
highp vec2 texCoord = vec2(texCoordX, texCoordY);
AF2 pp=AF2(texCoord * ubo.uTextureSize.xy - 0.5);
AF2 tc=(pp+AF2_(0.5))*AF2_AU2(con1.xy);
//   AH3 sA=FsrEasuSampleH(tc-AF2(0, AF1_AU1(con1.y)));
//   AH3 sB=FsrEasuSampleH(tc-AF2(AF1_AU1(con1.x), 0));
//   AH3 sC=FsrEasuSampleH(tc);
//   AH3 sD=FsrEasuSampleH(tc+AF2(AF1_AU1(con1.x), 0));
//   AH3 sE=FsrEasuSampleH(tc+AF2(0, AF1_AU1(con1.y)));
AH4 sA=FsrEasuSampleH(tc-AF2(0, AF1_AU1(con1.y)));
AH4 sB=FsrEasuSampleH(tc-AF2(AF1_AU1(con1.x), 0));
AH4 sC=FsrEasuSampleH(tc);
AH4 sD=FsrEasuSampleH(tc+AF2(AF1_AU1(con1.x), 0));
AH4 sE=FsrEasuSampleH(tc+AF2(0, AF1_AU1(con1.y)));
AH1 lA=sA.r*AH1_(0.5)+sA.g;
AH1 lB=sB.r*AH1_(0.5)+sB.g;
AH1 lC=sC.r*AH1_(0.5)+sC.g;
AH1 lD=sD.r*AH1_(0.5)+sD.g;
AH1 lE=sE.r*AH1_(0.5)+sE.g;
// Then takes magnitude from abs average of both sides of 'C'.
// Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
AH1 dc=lD-lC;
AH1 cb=lC-lB;
AH1 lenX=max(abs(dc),abs(cb));
lenX=ARcpH1(lenX);
AH1 dirX=lD-lB;
lenX=ASatH1(abs(dirX)*lenX);
lenX*=lenX;
// Repeat for the y axis.
AH1 ec=lE-lC;
AH1 ca=lC-lA;
AH1 lenY=max(abs(ec),abs(ca));
lenY=ARcpH1(lenY);
AH1 dirY=lE-lA;
lenY=ASatH1(abs(dirY)*lenY);
AH1 len = lenY * lenY + lenX;
AH2 dir = vec2(dirX, dirY);
//------------------------------------------------------------------------------------------------------------------------------
AH2 dir2=dir*dir;
AH1 dirR=dir2.x+dir2.y;
if (dirR<AH1_(1.0/64.0)) {
pix = sC.rgb;
return;
}
dirR=ARsqH1(dirR);
dir*=AH2_(dirR);
len=len*AH1_(0.5);
len*=len;
AH1 stretch=(dir.x*dir.x+dir.y*dir.y)*ARcpH1(max(abs(dir.x),abs(dir.y)));
AH2 len2=vec2(AH1_(1.0)+(stretch-AH1_(1.0))*len,AH1_(1.0)+AH1_(-0.5)*len);
AH1 lob=AH1_(0.5)+AH1_((1.0/4.0-0.04)-0.5)*len;
AH1 clp=ARcpH1(lob);
//------------------------------------------------------------------------------------------------------------------------------
AF2 fp=floor(pp);
pp-=fp;
AH2 ppp=vec2(pp);
AF2 p0=fp*AF2_AU2(con1.xy)+AF2_AU2(con1.zw);
AF2 p1=p0+AF2_AU2(con2.xy);
AF2 p2=p0+AF2_AU2(con2.zw);
AF2 p3=p0+AF2_AU2(con3.xy);
p0.y-=AF1_AU1(con1.w); p3.y+=AF1_AU1(con1.w);
AH4 fgcbR=FsrEasuRH(p0);
AH4 fgcbG=FsrEasuGH(p0);
AH4 fgcbB=FsrEasuBH(p0);
AH4 ijfeR=FsrEasuRH(p1);
AH4 ijfeG=FsrEasuGH(p1);
AH4 ijfeB=FsrEasuBH(p1);
AH4 klhgR=FsrEasuRH(p2);
AH4 klhgG=FsrEasuGH(p2);
AH4 klhgB=FsrEasuBH(p2);
AH4 nokjR=FsrEasuRH(p3);
AH4 nokjG=FsrEasuGH(p3);
AH4 nokjB=FsrEasuBH(p3);
//------------------------------------------------------------------------------------------------------------------------------
// This part is different for FP16, working pairs of taps at a time.
AH2 pR=AH2_(0.0);
AH2 pG=AH2_(0.0);
AH2 pB=AH2_(0.0);
AH2 pW=AH2_(0.0);
FsrEasuTapH(pR,pG,pB,pW,vec2( 1.0, 0.0)-ppp.xx,vec2(-1.0,-1.0)-ppp.yy,dir,len2,lob,clp,fgcbR.zw,fgcbG.zw,fgcbB.zw);
FsrEasuTapH(pR,pG,pB,pW,vec2(-1.0, 0.0)-ppp.xx,vec2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,ijfeR.xy,ijfeG.xy,ijfeB.xy);
FsrEasuTapH(pR,pG,pB,pW,vec2( 0.0,-1.0)-ppp.xx,vec2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,ijfeR.zw,ijfeG.zw,ijfeB.zw);
FsrEasuTapH(pR,pG,pB,pW,vec2( 1.0, 2.0)-ppp.xx,vec2( 1.0, 1.0)-ppp.yy,dir,len2,lob,clp,klhgR.xy,klhgG.xy,klhgB.xy);
FsrEasuTapH(pR,pG,pB,pW,vec2( 2.0, 1.0)-ppp.xx,vec2( 0.0, 0.0)-ppp.yy,dir,len2,lob,clp,klhgR.zw,klhgG.zw,klhgB.zw);
FsrEasuTapH(pR,pG,pB,pW,vec2( 0.0, 1.0)-ppp.xx,vec2( 2.0, 2.0)-ppp.yy,dir,len2,lob,clp,nokjR.xy,nokjG.xy,nokjB.xy);
AH3 aC=vec3(pR.x+pR.y,pG.x+pG.y,pB.x+pB.y);
AH1 aW=pW.x+pW.y;
//------------------------------------------------------------------------------------------------------------------------------
pix=aC*AH3_(ARcpH1(aW));
pix = mix(sC.rgb, pix, factor);
}


void main()
{
AU4 con1,con2,con3;
con1.x=AU1_AF1(ARcpF1(ubo.uTextureSize.x));
con1.y=AU1_AF1(ARcpF1(ubo.uTextureSize.y));
con1.z=AU1_AF1(AF1_( 1.0)*ARcpF1(ubo.uTextureSize.x));
con1.w=AU1_AF1(AF1_(-1.0)*ARcpF1(ubo.uTextureSize.y));
con2.x=AU1_AF1(AF1_(-1.0)*ARcpF1(ubo.uTextureSize.x));
con2.y=AU1_AF1(AF1_( 2.0)*ARcpF1(ubo.uTextureSize.y));
con2.z=AU1_AF1(AF1_( 1.0)*ARcpF1(ubo.uTextureSize.x));
con2.w=AU1_AF1(AF1_( 2.0)*ARcpF1(ubo.uTextureSize.y));
con3.x=AU1_AF1(AF1_( 0.0)*ARcpF1(ubo.uTextureSize.x));
con3.y=AU1_AF1(AF1_( 4.0)*ARcpF1(ubo.uTextureSize.y));
AH1 factor = AH1_(0.5f);
// FsrEasuF(outColor.rgb,con1,con2,con3);
FsrEasuL(outColor.rgb,con1,con2,con3,factor);
}

)""";

const std::string fsrVS = "#version 320 es                        \n" \
"layout(location = 0) out highp vec2 vTexcoord;              \n" \
"void main()                            \n" \
"{                                      \n" \
"    vTexcoord = vec2((gl_VertexID << 1) & 2, (gl_VertexID & 2));                                         \n" \
"    gl_Position = vec4(vTexcoord.x * 2.0f - 1.0f,  (vTexcoord.y * 2.0f - 1.0f) , 0.0f, 1.0f); \n" \
"}";
#endif //SHADER_SHADER_FSR_H
